package utils;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutput;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import net.didion.jwnl.JWNL;
import net.didion.jwnl.JWNLException;
import net.didion.jwnl.data.IndexWord;
import net.didion.jwnl.data.POS;
import net.didion.jwnl.dictionary.Dictionary;

/**
 * A utility for generating data files representing WordNet entries for nouns,
 * verbs and adjectives. It generates array Lists consisting of 
 * {@link net.didion.jwnl.data.IndexWord}s.
 * The purpose of this class is to pre-generate word lists which can then
 * be processed more quickly in the {@link aipj.assignment1.utils.WordNetWrapper}
 * utility used in Assignment 1.
 * <p>This code does not need to be altered for AIPJ assignment 1. It
 * may be useful for msc projects.</p>
 * 
 * @author Judy Robertson
 */

public class WordNetWords implements Serializable {

    /**
     * A WordNet dictionary from which the parts of speech words are extracted.
     */
    private Dictionary d;
    
    /**
     * A list of all the verbs in WordNet
     */
    private List verbList = new ArrayList();
    
    /**
     * A list of all the nouns in WordNet
     */
    private List nounList = new ArrayList();
    /**
     * A list of all the adjectives in WordNet
     */
    
    private List adjectiveList = new ArrayList();
    
    /**
     * British National Corpus data mapping words to frequencies
     */
    private Map BNC;

    /**
    * Opens the WordNet dictionary, and reads in BNC frequency data. Iterates 
    * over verbs, nouns and adjectives to build lists for each part of speech 
    * containing only words which can be found in a shortened version of the BNC.
    */
    public WordNetWords() {
        try {
            FileInputStream fi =
                new FileInputStream(System.getProperty("user.dir") + File.separator + "jwnl" +
                 "/file_properties.xml");
            JWNL.initialize(fi);
            d = Dictionary.getInstance();

        } catch (IOException e) {
            System.err.println("An IO error when loading wordnet");
            //e.printStackTrace();
        } catch (JWNLException err) {
            System.err.println("A JWNL exception occured during intialisation");
            err.printStackTrace();
        }
        readinBNC();
        buildVerbList();
        buildNounList();
        buildAdjectiveList();

    }

    /**
    * Iterates over the WordNet verbs and builds a list containing verb IndexWords
    * which can also be found in the shortened version of the BNC.
    *
    */
    private void buildVerbList() {
        try {

            Iterator i = d.getIndexWordIterator(POS.VERB);

            while (i.hasNext()) {
                IndexWord word = (IndexWord) i.next();
                //weed out words with low familiarity, according to information 
                //from the BNC
                BNCWord bncData = (BNCWord) BNC.get(word.getLemma());
                if (bncData != null) {
                    verbList.add(word);
                }

            }
        } catch (JWNLException e) {
            System.err.println(("An error occurred while attempting to get a " +                "word index iterator"));
            e.printStackTrace();
        }

    }

    /**
     * Iterates over the WordNet adjectives and builds a list containing 
     * adjective IndexWords which can also be found in the shortened 
     * version of the BNC.
     *
     */
    
    private void buildAdjectiveList() {
        try {

            Iterator i = d.getIndexWordIterator(POS.ADJECTIVE);

            while (i.hasNext()) {
                IndexWord word = (IndexWord) i.next();
                
                //weed out words with low familiarity, according to 
                //information from the BNC
                BNCWord bncData = (BNCWord) BNC.get(word.getLemma());
                if (bncData != null) {
                    adjectiveList.add(word);
                }

            }
        } catch (JWNLException e) {
            System.err.println(("An error occurred while attempting to get a " +                "word index iterator"));
            e.printStackTrace();
        }

    }

    /**
     * Iterates over the WordNet nouns and builds a list containing noun IndexWords
     * which can also be found in the shortened version of the BNC.
     *
     */
    private void buildNounList() {
        try {

            Iterator i = d.getIndexWordIterator(POS.NOUN);

            while (i.hasNext()) {
                IndexWord word = (IndexWord) i.next();
                //weed out words with low familiarity, according to information 
                //from the BNC
                BNCWord bncData = (BNCWord) BNC.get(word.getLemma());
                if (bncData != null) {
                    nounList.add(word);
                }

            }
        } catch (JWNLException e) {
            System.err.println(("An error occurred while attempting to get a " +                "word index iterator"));
            e.printStackTrace();
        }

    }
    /**
    * Write out the data for the specified part of speech to disc as a 
    * serialised list
    * @param pos The part of speech for which the data should be written
    */
    private void writeData(String pos) {
        File f = new File(System.getProperty("user.home") + File.separator +
         "wordnet" + File.separator + "data" + File.separator + pos + ".dat");
        try {
            FileOutputStream fstrm = new FileOutputStream(f);
            ObjectOutput ostrm = new ObjectOutputStream(fstrm);

            if (pos.equals("noun"))
                ostrm.writeObject(nounList);
            else if (pos.equals("adjective"))
                ostrm.writeObject(adjectiveList);
            else if (pos.equals("verb"))
                ostrm.writeObject(verbList);
            ostrm.flush();

        } catch (IOException e) {
            e.printStackTrace();
        }

    }

    /**
     * Read in the file containing the serialised BNC data structure from disc
     *
     */
    public void readinBNC() {

        File f = new File(System.getProperty("user.home") + File.separator 
        + "wordnet" + File.separator + "data" + File.separator + "bncobjects.dat");

        try {

            FileInputStream fin = new FileInputStream(f);

            ObjectInputStream istrm = new ObjectInputStream(fin);

            BNC = (Map) istrm.readObject();

        } catch (IOException e) {

            System.out.println("Trouble processing BNC file");

        } catch (ClassNotFoundException c) {

            System.err.println("Can't find class when trying to read " +                "serialised BNC words");

        }
    }
    /**
    * Build data structures for nouns, verbs and adjectives from the WordNet data 
    * and write them to disc for future use.
    *
    *  @param args None required
    */
    public static void main(String[] args) {

        WordNetWords w = new WordNetWords();
        w.writeData("noun");
        w.writeData("verb");
        w.writeData("adjective");
    }
}
